#This script executes regressions to estimate the association between
#first-generation prenatal exposure to nonattainment and second-generation high school completion and earnings.

rm(list=ls())
gc()
library(reldist)
library(ineq)
library(stargazer)
library(data.table)
library(dplyr)
library(dtplyr)
library(ggplot2)
library(stringdist)
library(lfe)
library(haven)
library(readr)
library(broom)
library(tidylog)
library(rdrobust)

setwd("/projects/opp_env/caa1970/")

source("Code/Child_Outcomes/child_outcomes_functions.R")
source("Code/rounding.r")

load("Data/second_gen_college_analysis_data_jpemicro.rda")
load("Data/second_gen_earnings_23_plus_analysis_data_jpemicro.rda")

Table_A5 <- data.frame()

# Panel A, Column 1 -- HS Diploma 

TabA5_A1 <- felm(HS_grad~ #LHS 
                  factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                  PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                  county_pop*year_trend + epop*I(year_trend^2) + 
                  total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                  tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm | #Controls
                  state_year+county_factor+year_factor+month+
                  survey_year_by_year | #Fixed Effects
                  (all_fullterm~nonattainment_infant) | #IV spec
                  county_factor, #Cluster
                data=regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC))) 

summary(TabA5_A1)
TabA5_A1$N
temp_n<-  rounding_rules_counts(TabA5_A1$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm), !is.na(HS_grad),!is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$HS_grad, na.rm=T ),4)
temp_f <-  signif(TabA5_A1$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabA5_A1) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = 2, column ="A2" ) #add variables for N and control mean, plus index the table number and column
Table_A5 <- bind_rows(Table_A5, temp_results) #Appended to big dataset

# Panel A, Column 2 -- log real wages
TabA5_A2 <- felm(log_real_wages~ #LHS 
                  factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                  PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                  county_pop*year_trend + epop*I(year_trend^2) + 
                  total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                  tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm | #Controls
                  state_year+county_factor+year_factor+month+
                  survey_year_by_year | #Fixed Effects
                  (all_fullterm~nonattainment_infant) | #IV spec
                  county_factor, #Cluster
                data=regdata %>% filter(year%in%1969:1975,real_wages > 0, real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC))) 

summary(TabA5_A2)
TabA5_A2$N
temp_n<-  rounding_rules_counts(TabA5_A2$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, real_wages > 0, real_wages < wage_cap, !is.na(all_fullterm), !is.na(log_real_wages),!is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$real_wages, na.rm=T ),4)
temp_f <-  signif(TabA5_A2$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabA5_A2) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = "X", column ="A2" ) #add variables for N and control mean, plus index the table number and column
Table_A5 <- bind_rows(Table_A5, temp_results) #Appended to big dataset

# Panel A, Column 3 -- positive real earnings 

TabA5_A3 <- felm(real_wages~ #LHS 
                  factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                  PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                  county_pop*year_trend + epop*I(year_trend^2) + 
                  total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                  tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm | #Controls
                  state_year+county_factor+year_factor+month+
                  survey_year_by_year | #Fixed Effects
                  (all_fullterm~nonattainment_infant) | #IV spec
                  county_factor, #Cluster
                data=regdata %>% filter(year%in%1969:1975, real_wages > 0, real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC))) 

summary(TabA5_A3)
TabA5_A3$N
temp_n<-  rounding_rules_counts(TabA5_A3$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, real_wages > 0,real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$real_wages, na.rm=T ),4)
temp_f <-  signif(TabA5_A3$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabA5_A3) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = "X", column ="A2" ) #add variables for N and control mean, plus index the table number and column
Table_A5 <- bind_rows(Table_A5, temp_results) #Appended to big dataset

# Panel A, Column 4 -- real earnings 

TabA5_A4 <- felm(real_wages~ #LHS 
                  factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                  PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                  county_pop*year_trend + epop*I(year_trend^2) + 
                  total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                  tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm | #Controls
                  state_year+county_factor+year_factor+month+
                  survey_year_by_year | #Fixed Effects
                  (all_fullterm~nonattainment_infant) | #IV spec
                  county_factor, #Cluster
                data=regdata %>% filter(year%in%1969:1975, real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC))) 

summary(TabA5_A4)
TabA5_A4$N
temp_n<-  rounding_rules_counts(TabA5_A4$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$real_wages, na.rm=T ),4)
temp_f <-  signif(TabA5_A4$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabA5_A4) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = "X", column ="A2" ) #add variables for N and control mean, plus index the table number and column
Table_A5 <- bind_rows(Table_A5, temp_results) #Appended to big dataset


# Panel B, Column 1 -- High School Diploma

TabA5_B1 <- felm(HS_grad~ #LHS 
                  factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                  PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                  county_pop*year_trend + epop*I(year_trend^2) + 
                  total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                  tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm + nonattainment_infant | #Controls
                  state_year+county_factor+year_factor+month+
                  survey_year_by_year | 0 | county_factor, data=regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC)))

summary(TabA5_B1)
TabA5_B1$N
temp_n<-  rounding_rules_counts(TabA5_B1$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm), !is.na(HS_grad),!is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$HS_grad, na.rm=T ),4)
temp_results <- tidy(TabA5_B1) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, table = 2, column ="B2" ) #add variables for N and control mean, plus index the table number and column
Table_A5 <- bind_rows(Table_A5, temp_results) #Appended to big dataset

# Panel B, Column 2 -- log real wages

TabA5_B2 <- felm(log_real_wages~ #LHS 
                  factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                  PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                  county_pop*year_trend + epop*I(year_trend^2) + 
                  total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                  tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm + nonattainment_infant | #Controls
                  state_year+county_factor+year_factor+month+
                  survey_year_by_year | 0 | county_factor, data=regdata %>% filter(year%in%1969:1975, real_wages>0, real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC))) 

summary(TabA5_B2)
TabA5_B2$N
temp_n<-  rounding_rules_counts(TabA5_B2$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975,!is.na(all_fullterm), real_wages>0, real_wages < wage_cap, !is.na(log_real_wages), !is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$real_wages, na.rm=T ),4)
temp_results <- tidy(TabA5_B2) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, table = "X", column ="B2" ) #add variables for N and control mean, plus index the table number and column
Table_A5 <- bind_rows(Table_A5, temp_results) #Appended to big dataset

# Panel B, Column 3 -- positive real earnings

TabA5_B3 <- felm(real_wages~ #LHS 
                  factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                  PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                  county_pop*year_trend + epop*I(year_trend^2) + 
                  total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                  tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm + nonattainment_infant | #Controls
                  state_year+county_factor+year_factor+month+
                  survey_year_by_year | 0 | county_factor, data=regdata %>% filter(year%in%1969:1975, real_wages > 0,real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC))) 

summary(TabA5_B3)
TabA5_B3$N
temp_n<-  rounding_rules_counts(TabA5_B3$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, real_wages > 0,real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$real_wages, na.rm=T ),4)
temp_results <- tidy(TabA5_B3) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, table = "X", column ="B2" ) #add variables for N and control mean, plus index the table number and column
Table_A5 <- bind_rows(Table_A5, temp_results) #Appended to big dataset

# Panel B, Column 4 -- real earnings

TabA5_B4 <- felm(real_wages~ #LHS 
                  factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                  PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                  county_pop*year_trend + epop*I(year_trend^2) + 
                  total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                  tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm + nonattainment_infant | #Controls
                  state_year+county_factor+year_factor+month+
                  survey_year_by_year | 0 | county_factor, data=regdata %>% filter(year%in%1969:1975, real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC))) 

summary(TabA5_B4)
TabA5_B4$N
temp_n<-  rounding_rules_counts(TabA5_B4$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$real_wages, na.rm=T ),4)
temp_results <- tidy(TabA5_B4) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, table = "X", column ="B2" ) #add variables for N and control mean, plus index the table number and column
Table_A5 <- bind_rows(Table_A5, temp_results) #Appended to big dataset

write_csv(Table_A5, "/projects/opp_env/caa1970/JPE_Micro/Output/Table A5/Table_A5.csv")

